Chapter 9 HMSC analysis

9.1 Load data

load("data/data.Rdata")

9.2 Compute variance partitioning

# Select modelchain of interest
load("hmsc_bookdown/fit_model1_250_10.Rdata")

varpart=computeVariancePartitioning(m)

# Compute variance partitioning
varpart=computeVariancePartitioning(m)

varpart$vals %>%
   as.data.frame() %>%
   rownames_to_column(var="variable") %>%
   pivot_longer(!variable, names_to = "genome", values_to = "value") %>%
   mutate(variable=factor(variable, levels=c("Elevation","Tree_cover", "Anthropization_cover", "logseqdepth","Random: Sampling_point", "Random: Transect"))) %>%
   group_by(variable) %>%
   summarise(mean=mean(value)*100,sd=sd(value)*100) %>%
   tt()
tinytable_9phqvwi818dp1pocqoz0
variable mean sd
Elevation 13.690604 10.655348
Tree_cover 8.543097 5.210723
Anthropization_cover 12.322244 6.369957
logseqdepth 21.619303 13.494332
Random: Sampling_point 27.126206 17.220611
Random: Transect 16.698545 12.732231
# Basal tree
varpart_tree <- genome_tree %>%
        keep.tip(., tip=m$spNames)

#Varpart table
varpart_table <- varpart$vals %>%
   as.data.frame() %>%
   rownames_to_column(var="variable") %>%
   pivot_longer(!variable, names_to = "genome", values_to = "value") %>%
   mutate(variable=factor(variable, levels=rev(c("Elevation","Tree_cover","Anthropization_cover", "logseqdepth","Random: Sampling_point", "Random: Transect")))) %>%
   mutate(genome=factor(genome, levels=rev(varpart_tree$tip.label)))

#Phylums
phylum_colors <- read_tsv("https://raw.githubusercontent.com/earthhologenome/EHI_taxonomy_colour/main/ehi_phylum_colors.tsv") %>%
  right_join(genome_metadata, by=join_by(phylum == phylum)) %>%
    filter(genome %in% varpart_tree$tip.label) %>%
    arrange(match(genome, varpart_tree$tip.label)) %>%
    mutate(phylum = factor(phylum, levels = unique(phylum))) %>%
    column_to_rownames(var = "genome") %>%
    select(phylum)


colors_alphabetic <- read_tsv("https://raw.githubusercontent.com/earthhologenome/EHI_taxonomy_colour/main/ehi_phylum_colors.tsv") %>%
  right_join(genome_metadata, by=join_by(phylum == phylum)) %>%
    filter(genome %in% varpart_tree$tip.label) %>%
    arrange(match(genome, varpart_tree$tip.label)) %>%
     select(phylum, colors) %>%
    unique() %>%
    arrange(phylum) %>%
    select(colors) %>%
    pull()

# Basal ggtree
varpart_tree <- varpart_tree %>%
        force.ultrametric(.,method="extend") %>%
        ggtree(., size = 0.3)
***************************************************************
*                          Note:                              *
*    force.ultrametric does not include a formal method to    *
*    ultrametricize a tree & should only be used to coerce    *
*   a phylogeny that fails is.ultrametric due to rounding --  *
*    not as a substitute for formal rate-smoothing methods.   *
***************************************************************
# Add phylum colors next to the tree tips
varpart_tree <- gheatmap(varpart_tree, phylum_colors, offset=-0.2, width=0.1, colnames=FALSE) +
   scale_fill_manual(values=colors_alphabetic)+
      labs(fill="Phylum")

#Reset fill scale to use a different colour profile in the heatmap
varpart_tree <- varpart_tree + new_scale_fill()

# Add variance stacked barplot
vertical_tree <-  varpart_tree +
        scale_fill_manual(values=c("#34738f","#cccccc","#ed8a45","#b2b530","#be3e2b","#83bb90","#f6de6c", "#122f3d"))+
        geom_fruit(
             data=varpart_table,
             geom=geom_bar,
             mapping = aes(x=value, y=genome, fill=variable, group=variable),
             pwidth = 2,
             offset = 0.05,
             width= 1,
             orientation="y",
             stat="identity",
             color = "white",
             size=0.1)+
      labs(fill="Variable")

vertical_tree

# Select desired support threshold
support=0.9
negsupport=1-support

# Basal tree
postestimates_tree <- genome_tree %>%
        keep.tip(., tip=m$spNames)

#plotBeta(hM=m, post=getPostEstimate(hM=m, parName="Beta"), param = "Support", plotTree = TRUE, covNamesNumbers=c(1,0))

# Posterior estimate table
post_beta <- getPostEstimate(hM=m, parName="Beta")$support %>%
    as.data.frame() %>%
    mutate(variable=m$covNames) %>%
    pivot_longer(!variable, names_to = "genome", values_to = "value") %>%
    mutate(genome=factor(genome, levels=rev(postestimates_tree$tip.label))) %>%
    mutate(value = case_when(
          value >= support ~ "Positive",
          value <= negsupport ~ "Negative",
          TRUE ~ "Neutral")) %>%
    mutate(value=factor(value, levels=c("Positive","Neutral","Negative"))) %>%
    pivot_wider(names_from = variable, values_from = value) %>%
    rename(intercept=2,
             Elevation=3,
           Tree_cover=4,
           Anthropization_cover=5,
             logseqdepth=6
           ) %>%
    select(genome,intercept,Elevation,Tree_cover,Anthropization_cover,logseqdepth) %>%
    column_to_rownames(var="genome")

#Phylums
phylum_colors <- read_tsv("https://raw.githubusercontent.com/earthhologenome/EHI_taxonomy_colour/main/ehi_phylum_colors.tsv") %>%
  right_join(genome_metadata, by=join_by(phylum == phylum)) %>%
    filter(genome %in% postestimates_tree$tip.label) %>%
    arrange(match(genome, postestimates_tree$tip.label)) %>%
    mutate(phylum = factor(phylum, levels = unique(phylum))) %>%
    column_to_rownames(var = "genome") %>%
    select(phylum)


colors_alphabetic <- read_tsv("https://raw.githubusercontent.com/earthhologenome/EHI_taxonomy_colour/main/ehi_phylum_colors.tsv") %>%
  right_join(genome_metadata, by=join_by(phylum == phylum)) %>%
    filter(genome %in% postestimates_tree$tip.label) %>%
    arrange(match(genome, postestimates_tree$tip.label)) %>%
     select(phylum, colors) %>%
    unique() %>%
    arrange(phylum) %>%
    select(colors) %>%
    pull()

# Basal ggtree
postestimates_tree <- postestimates_tree %>%
        force.ultrametric(.,method="extend") %>%
        ggtree(., size = 0.3)
***************************************************************
*                          Note:                              *
*    force.ultrametric does not include a formal method to    *
*    ultrametricize a tree & should only be used to coerce    *
*   a phylogeny that fails is.ultrametric due to rounding --  *
*    not as a substitute for formal rate-smoothing methods.   *
***************************************************************
#Add phylum colors next to the tree tips
postestimates_tree <- gheatmap(postestimates_tree, phylum_colors, offset=-0.2, width=0.1, colnames=FALSE) +
      scale_fill_manual(values=colors_alphabetic)+
      labs(fill="Phylum")

#Reset fill scale to use a different colour profile in the heatmap
postestimates_tree <- postestimates_tree + new_scale_fill()

# Add posterior significant heatmap

postestimates_tree <- gheatmap(postestimates_tree, post_beta, offset=0, width=0.5, colnames=TRUE, colnames_position="top",colnames_angle=90, colnames_offset_y=1, hjust=0) +
        scale_fill_manual(values=c("#be3e2b","#f4f4f4","#b2b530"))+
        labs(fill="Trend")

postestimates_tree +
        vexpand(.25, 1) # expand top

#Compute the residual correlation matrix
OmegaCor = computeAssociations(m)

# Reference tree (for sorting genomes)
genome_tree_subset <- genome_tree %>%
        keep.tip(., tip=m$spNames)


#Co-occurrence matrix at the animal level
supportLevel = 0.95
toPlot = ((OmegaCor[[1]]$support>supportLevel)
          + (OmegaCor[[1]]$support<(1-supportLevel))>0)*OmegaCor[[1]]$mean

matrix <- toPlot %>%
      as.data.frame() %>%
      rownames_to_column(var="genome1") %>%
      pivot_longer(!genome1, names_to = "genome2", values_to = "cor") %>%
      mutate(genome1= factor(genome1, levels=genome_tree_subset$tip.label)) %>%
      mutate(genome2= factor(genome2, levels=genome_tree_subset$tip.label)) %>%
      ggplot(aes(x = genome1, y = genome2, fill = cor)) +
            geom_tile() +
            scale_fill_gradient2(low = "#be3e2b",
                       mid = "#f4f4f4",
                       high = "#b2b530")+
            theme_void()

vtree_top <- genome_tree_subset %>%
  force.ultrametric(.,method="extend") %>%
  ggtree(layout = "rectangular") + 
  layout_dendrogram() +  # Ensure correct layout
  coord_flip()
***************************************************************
*                          Note:                              *
*    force.ultrametric does not include a formal method to    *
*    ultrametricize a tree & should only be used to coerce    *
*   a phylogeny that fails is.ultrametric due to rounding --  *
*    not as a substitute for formal rate-smoothing methods.   *
***************************************************************
vtree_left <- genome_tree_subset %>%
  force.ultrametric(.,method="extend") %>%
  ggtree(.)
***************************************************************
*                          Note:                              *
*    force.ultrametric does not include a formal method to    *
*    ultrametricize a tree & should only be used to coerce    *
*   a phylogeny that fails is.ultrametric due to rounding --  *
*    not as a substitute for formal rate-smoothing methods.   *
***************************************************************
#create composite figure
grid.arrange(grobs = list(vtree_top,matrix,vtree_left),
             layout_matrix = rbind(c(4,1,1,1,1,1,1,1,1,1,1,1),
                                   c(3,2,2,2,2,2,2,2,2,2,2,2),
                                   c(3,2,2,2,2,2,2,2,2,2,2,2),
                                   c(3,2,2,2,2,2,2,2,2,2,2,2),
                                   c(3,2,2,2,2,2,2,2,2,2,2,2),
                                   c(3,2,2,2,2,2,2,2,2,2,2,2),
                                   c(3,2,2,2,2,2,2,2,2,2,2,2),
                                   c(3,2,2,2,2,2,2,2,2,2,2,2),
                                   c(3,2,2,2,2,2,2,2,2,2,2,2),
                                   c(3,2,2,2,2,2,2,2,2,2,2,2),
                                   c(3,2,2,2,2,2,2,2,2,2,2,2),
                                   c(3,2,2,2,2,2,2,2,2,2,2,2)))

9.3 Elevation predictions

gradient = seq(940, 2350, by = 100)
gradientlength = length(gradient)

#Treatment-specific gradient predictions
pred_elevation <- constructGradient(m,
                      focalVariable = "Elevation",
                      non.focalVariables = list(logseqdepth=list(1),Tree_cover=list(1), Anthropization_cover=list(1)),
                      ngrid=gradientlength) %>%
                      predict(m, Gradient = ., expected = TRUE) %>%
                      do.call(rbind,.) %>%
                      as.data.frame() %>%
                      mutate(elevation=rep(gradient,1000)) %>%
                      pivot_longer(-c(elevation), names_to = "genome", values_to = "value")

9.3.1 Responses to elevation

# Select desired support threshold
support=0.9
negsupport=1-support

#Get phylum colors from the EHI standard
phylum_colors <- genome_metadata %>%
    left_join(read_tsv("https://raw.githubusercontent.com/earthhologenome/EHI_taxonomy_colour/main/ehi_phylum_colors.tsv"), by=join_by(phylum == phylum)) %>%
    arrange(match(genome, genome_tree$tip.label)) %>%
    select(phylum, colors) %>%
    unique() %>%
    arrange(phylum) %>%
    #slice(2:5) %>%
    select(colors) %>%
    pull()

getPostEstimate(hM=m, parName="Beta")$support %>%
    as.data.frame() %>%
    mutate(variable=m$covNames) %>%
    pivot_longer(!variable, names_to = "genome", values_to = "value") %>%
    mutate(trend = case_when(
          value >= support ~ "Positive",
          value <= negsupport ~ "Negative",
          TRUE ~ "Neutral")) %>%
    filter(variable=="Elevation") %>%
    select(genome,trend) %>%
    left_join(pred_elevation, by=join_by(genome==genome)) %>%
    group_by(genome, trend, elevation) %>%
    summarize(value = mean(value, na.rm = TRUE)) %>%
    left_join(genome_metadata, by=join_by(genome == genome)) %>%
    ggplot(aes(x=elevation, y=value, group=genome, color=phylum, linetype=trend)) +
        geom_line() +
        scale_linetype_manual(values=c("solid","dashed","solid")) +
        scale_color_manual(values=phylum_colors) +
        facet_grid(fct_rev(trend) ~ phylum) +
        labs(y="Genome abundance (log)",x="Elevation") +
        theme(legend.position = "none") +
        theme_minimal() +
         theme(legend.position = "none",
               axis.text.x = element_text(angle = 45, hjust = 0.8,),
               axis.line.x = element_line(size = 0.5, linetype = "solid", colour = "black"),
)

9.4 Functional predictions

9.4.1 Element level

elements_table <- genome_gifts_filt %>%
    to.elements(., GIFT_db) %>%
    as.data.frame()

community_elements <- pred_elevation %>%
  group_by(elevation, genome) %>%
  mutate(row_id = row_number()) %>%
  pivot_wider(names_from = genome, values_from = value) %>%
  ungroup() %>%
  group_split(row_id) %>%
  as.list() %>%
  lapply(., FUN = function(x){x %>%
    select(-row_id) %>%
    column_to_rownames(var = "elevation") %>%
    as.data.frame() %>%
    exp() %>%
    t() %>%
    tss() %>%
    to.community(elements_table,.,GIFT_db) %>%
    as.data.frame() %>%
    rownames_to_column(var="elevation")
   })

calculate_slope <- function(x) {
  lm_fit <- lm(unlist(x) ~ seq_along(unlist(x)))
  coef(lm_fit)[2]
}

element_predictions <- map_dfc(community_elements, function(mat) {
      mat %>%
        column_to_rownames(var = "elevation") %>%
        t() %>%
        as.data.frame() %>%
        rowwise() %>%
        mutate(slope = calculate_slope(c_across(everything()))) %>%
        select(slope) }) %>%
      t() %>%
      as.data.frame() %>%
      set_names(colnames(community_elements[[1]])[-1]) %>%
      rownames_to_column(var="iteration") %>%
      pivot_longer(!iteration, names_to="trait",values_to="value") %>%
      group_by(trait) %>%
      summarise(mean=mean(value),
        p1 = quantile(value, probs = 0.1),
        p9 = quantile(value, probs = 0.9),
        positive_support = sum(value > 0)/1000,
        negative_support = sum(value < 0)/1000) %>%
      arrange(-positive_support)
# Positively associated
p0<-positive_filtered<-element_predictions %>%
  filter(mean >0) %>%
  arrange(-positive_support) %>%
  filter(positive_support>=0.9) 

if (nrow(positive_filtered) > 0) {
  positive_filtered %>%
  tt() |>
      style_tt(
        i = which(element_predictions$positive_support < 0.9 & element_predictions$negative_support < 0.1),
        background = "#E5D5B1") |>
      style_tt(
        i = which(element_predictions$negative_support > 0.9 & element_predictions$positive_support < 0.1),
        background = "#B7BCCE")
}


#Negatively associated
p1<-negative_filtered<-element_predictions %>%
  filter(mean <0) %>%
  arrange(-negative_support) %>%
  filter(negative_support>=0.9) 

if (nrow(negative_filtered) > 0) {
  negative_filtered %>%
  tt() |>
      style_tt(
        i = which(element_predictions$positive_support < 0.9 & element_predictions$negative_support < 0.1),
        background = "#E5D5B1") |>
      style_tt(
        i = which(element_predictions$negative_support > 0.9 & element_predictions$positive_support < 0.1),
        background = "#B7BCCE")
}
# Positively associated
p0 %>%
  tt()
tinytable_8k957b98c4jaxo5dl2m2
trait mean p1 p9 positive_support negative_support
D0613 0.034264683 0.0113319085 0.060146258 0.971 0.029
B0106 0.017715307 0.0039875628 0.033388901 0.966 0.034
D0609 0.014654471 0.0028010131 0.027577745 0.962 0.038
D0205 0.008600377 0.0020467751 0.015207831 0.953 0.047
D0207 0.031552241 0.0069001701 0.067737031 0.949 0.051
B0802 0.037714918 0.0086104696 0.071624679 0.947 0.053
D0304 0.014938085 0.0035430830 0.027059303 0.945 0.055
B0214 0.019893750 0.0026561423 0.037824383 0.931 0.069
B0217 0.013929202 0.0015844388 0.028027942 0.925 0.075
D0817 0.004336948 0.0003403666 0.008888889 0.919 0.081
#Negatively associated
p1%>%
  tt()
tinytable_smtv371ye8ueztj6bxtr
trait mean p1 p9 positive_support negative_support
B0208 -0.0344477317 -0.0618098460 -1.039294e-02 0.016 0.984
S0104 -0.0209740208 -0.0337504690 -8.006082e-03 0.022 0.978
D0805 -0.0004581602 -0.0009344457 -6.018417e-05 0.035 0.965
B0710 -0.0061948836 -0.0108430396 -1.870104e-03 0.037 0.963
B0310 -0.0028164249 -0.0048520436 -8.904523e-04 0.043 0.957
B1029 -0.0003677198 -0.0007876956 -4.280235e-05 0.045 0.955
D0604 -0.0252076749 -0.0512963980 -5.101516e-03 0.050 0.950
D0903 -0.0047866601 -0.0089646580 -1.108197e-03 0.050 0.950
B0218 -0.0116625901 -0.0223161257 -2.061981e-03 0.053 0.947
B0703 -0.0192156802 -0.0373477215 -2.861509e-03 0.059 0.941
D0206 -0.0197688096 -0.0392643553 -3.454538e-03 0.061 0.939
D0701 -0.0051397234 -0.0101283806 -6.650890e-04 0.068 0.932
D0509 -0.0151983776 -0.0301644940 -2.157025e-03 0.070 0.930
D0103 -0.0058062776 -0.0111387262 -5.611137e-04 0.073 0.927
B0903 -0.0040583726 -0.0082299820 -3.182127e-04 0.078 0.922
B0711 -0.0116387135 -0.0239457783 -1.087371e-03 0.079 0.921
D0307 -0.0106690266 -0.0219998777 -7.160822e-04 0.083 0.917
B0702 -0.0164229499 -0.0340972191 -1.406339e-03 0.085 0.915
D0508 -0.0025923862 -0.0054092788 -1.884571e-04 0.089 0.911
D0908 -0.0351466114 -0.0789647021 -1.049356e-03 0.091 0.909
D0912 -0.0009892400 -0.0020007917 -1.425600e-05 0.092 0.908
B0303 -0.0072114512 -0.0152002291 -7.612867e-05 0.097 0.903
#Positively associated
positive <- element_predictions %>%
  filter(mean >0) %>%
  arrange(mean) %>%
  filter(positive_support>=0.9) %>%
  select(-negative_support) %>%
  rename(support=positive_support)

#Negatively associated
negative <- element_predictions %>%
  filter(mean <0) %>%
  arrange(mean) %>%
  filter(negative_support>=0.9) %>%
  select(-positive_support) %>%
  rename(support=negative_support)

all_elements <- bind_rows(positive,negative) %>%
  left_join(GIFT_db,by=join_by(trait==Code_element)) %>%
  mutate(trait=factor(trait,levels=c(rev(positive$trait),rev(negative$trait)))) %>%
  mutate(Code_function=factor(Code_function)) %>%
  mutate(element_legend=str_c(trait," - ",Element)) %>%
  mutate(function_legend=str_c(Code_function," - ",Function)) %>%
  select(trait,mean,p1,p9,element_legend,function_legend) %>% 
  unique()

gift_colors <- read_tsv("data/gift_colors.tsv") %>% 
  mutate(legend=str_c(Code_function," - ",Function))  %>% 
  filter(legend %in% all_elements$function_legend)

all_elements %>%
  ggplot(aes(x=mean, y=fct_reorder(element_legend, mean), xmin=p1, xmax=p9, color=function_legend)) +
      geom_point() +
      geom_errorbar() +
      xlim(c(-0.15,0.15)) +
      geom_vline(xintercept=0) +
      scale_color_manual(values = gift_colors$Color) +
      theme_minimal() +
      labs(x="Regression coefficient",y="Functional trait")

community_elements %>%
    bind_rows() %>%
    pivot_longer(-elevation, names_to = "trait", values_to = "value") %>%
    filter(trait %in% positive$trait) %>%
    mutate(trait=factor(trait, levels=positive$trait)) %>%
    mutate(elevation=as.numeric(elevation)) %>%
    ggplot(aes(x=elevation, y=value)) +
          geom_smooth(method = lm, formula = y ~ x, se = TRUE) +
          #geom_smooth(method = lm, formula = y ~ splines::bs(x, 3), se = TRUE) +
          facet_wrap(~trait, ncol=5, scales="free") +
          theme_minimal() +
          labs(x="Elevation",y="Metabolic Capacity Index")

9.4.1.1 GIFT test visualization

# Aggregate bundle-level GIFTs into the compound level
genome_counts_filt_filt <- tibble::rownames_to_column(genome_counts_filt_filt, var = "genome")
GIFTs_elements_filtered <- elements_table[rownames(elements_table) %in% genome_counts_filt_filt$genome, ]
GIFTs_elements_filtered <- as.data.frame(GIFTs_elements_filtered) %>%
  select_if(~ !is.numeric(.) || sum(.) != 0)

# Get community-weighed average GIFTs per sample
GIFTs_elements_community <- to.community(GIFTs_elements_filtered, genome_counts_filt_filt %>% column_to_rownames(., "genome") %>% tss(), GIFT_db)

GIFTs_elements_community %>%
    as.data.frame() %>%
    rownames_to_column(var="sample") %>%
    pivot_longer(!sample,names_to="trait",values_to="gift") %>%
    left_join(sample_metadata, by = join_by(sample == EHI_number)) %>%
    mutate(functionid = substr(trait, 1, 3)) %>%
    mutate(trait = case_when(
      trait %in% GIFT_db$Code_element ~ GIFT_db$Element[match(trait, GIFT_db$Code_element)],
      TRUE ~ trait
    )) %>%
    mutate(functionid = case_when(
      functionid %in% GIFT_db$Code_function ~ GIFT_db$Function[match(functionid, GIFT_db$Code_function)],
      TRUE ~ functionid
    )) %>%
    mutate(trait=factor(trait,levels=unique(GIFT_db$Element))) %>%
    mutate(functionid=factor(functionid,levels=unique(GIFT_db$Function))) %>%
    ggplot(aes(x=sample,y=trait,fill=gift)) +
        geom_tile(colour="white", linewidth=0.2)+
        scale_fill_gradientn(colours=rev(c("#d53e4f", "#f46d43", "#fdae61", "#fee08b", "#e6f598", "#abdda4", "#ddf1da")))+
        facet_grid(functionid ~ Elevation, scales="free",space="free") +
        theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1),
              strip.text.y = element_text(angle = 0)) +
        labs(y="Traits",x="Samples",fill="GIFT")

9.4.2 Functional level

functions_table <- elements_table %>%
    to.functions(., GIFT_db) %>%
    as.data.frame()

community_functions <- pred_elevation %>%
  group_by(elevation, genome) %>%
  mutate(row_id = row_number()) %>%
  pivot_wider(names_from = genome, values_from = value) %>%
  ungroup() %>%
  group_split(row_id) %>%
  as.list() %>%
  lapply(., FUN = function(x){x %>%
    select(-row_id) %>%
    column_to_rownames(var = "elevation") %>%
    as.data.frame() %>%
    exp() %>%
    t() %>%
    tss() %>%
    to.community(functions_table,.,GIFT_db) %>%
    as.data.frame() %>%
    rownames_to_column(var="elevation")
   })
#max-min option
calculate_slope <- function(x) {
  lm_fit <- lm(unlist(x) ~ seq_along(unlist(x)))
  coef(lm_fit)[2]
}

function_predictions <- map_dfc(community_functions, function(mat) {
      mat %>%
        column_to_rownames(var = "elevation") %>%
        t() %>%
        as.data.frame() %>%
        rowwise() %>%
        mutate(slope = calculate_slope(c_across(everything()))) %>%
        select(slope) }) %>%
      t() %>%
      as.data.frame() %>%
      set_names(colnames(community_functions[[1]])[-1]) %>%
      rownames_to_column(var="iteration") %>%
      pivot_longer(!iteration, names_to="trait",values_to="value") %>%
      group_by(trait) %>%
      summarise(mean=mean(value),
        p1 = quantile(value, probs = 0.1),
        p9 = quantile(value, probs = 0.9),
        positive_support = sum(value > 0)/1000,
        negative_support = sum(value < 0)/1000) %>%
      arrange(-positive_support)
# Positively associated
p2<-function_predictions %>%
  filter(mean >0) %>%
  arrange(-positive_support) %>%
  filter(positive_support>=0.9) %>%
  tt() |>
      style_tt(
        i = which(function_predictions$positive_support < 0.9 & function_predictions$negative_support < 0.1),
        background = "#E5D5B1") |>
      style_tt(
        i = which(function_predictions$negative_support > 0.9 & function_predictions$positive_support < 0.1),
        background = "#B7BCCE")

# Negatively associated (there isn't)
#function_predictions %>%
  #filter(mean <0) %>%
  #arrange(-negative_support) %>%
  #filter(negative_support>=0.9) %>%
  #tt() |>
      #style_tt(
        #i = which(function_predictions$positive_support < 0.9 & function_predictions$negative_support < 0.1),
        #background = "#E5D5B1") |>
      #style_tt(
        #i = which(function_predictions$negative_support > 0.9 & function_predictions$positive_support < 0.1),
        #background = "#B7BCCE")
# Positively associated
p2 %>%
  tt()
tinytable_fovnjr9bics903tetu1c
trait mean p1 p9 positive_support negative_support
S03 0.026906176 0.0104593848 0.044524683 0.973 0.027
B04 0.018264988 0.0051591436 0.033137776 0.970 0.030
D03 0.010497822 0.0021459296 0.019986199 0.943 0.057
D06 0.003285998 0.0004532722 0.006338923 0.933 0.067
B07 0.012228797 0.0004053295 0.024691062 0.906 0.094
#Negatively associated (there isn't)
all_functions <- function_predictions %>%
  left_join(GIFT_db,by=join_by(trait==Code_function)) %>%
  mutate(trait=factor(trait)) %>%
  mutate(function_legend=str_c(trait," - ",Function)) %>%
  select(trait,mean,p1,p9,function_legend) %>% 
  unique()

gift_colors <- read_tsv("data/gift_colors.tsv") %>% 
  mutate(legend=str_c(Code_function," - ",Function))  %>% 
  filter(legend %in% all_functions$function_legend)

all_functions %>%
  ggplot(aes(x=mean, y=fct_reorder(function_legend, mean), xmin=p1, xmax=p9, color=function_legend)) +
      geom_point() +
      geom_errorbar() +
      xlim(c(-0.05,0.06)) +
      geom_vline(xintercept=0) +
      scale_color_manual(values = gift_colors$Color) +
      theme_minimal() +
      labs(x="Regression coefficient",y="Functional trait") +
      guides(col = guide_legend(ncol = 1))

community_functions %>%
    bind_rows() %>%
    pivot_longer(-elevation, names_to = "trait", values_to = "value") %>%
  filter(trait %in% function_predictions$trait) %>%
  mutate(trait=factor(trait, levels=function_predictions$trait)) %>%
    mutate(elevation=as.numeric(elevation)) %>%
    ggplot(aes(x=elevation, y=value)) +
          geom_smooth(method = lm, formula = y ~ x, se = TRUE) +
          #geom_smooth(method = lm, formula = y ~ splines::bs(x, 3), se = TRUE) +
          facet_wrap(~trait, ncol=5, scales="free") +
          theme_minimal() +
          labs(x="Elevation",y="Metabolic Capacity Index")

9.4.2.1 GIFT test visualization

# Aggregate element-level GIFTs into the function level
GIFTs_functions <- to.functions(GIFTs_elements_filtered, GIFT_db)
functions <- GIFTs_functions %>%
  as.data.frame()

# Get community-weighed average GIFTs per sample
GIFTs_functions_community <- to.community(GIFTs_functions, genome_counts_filt_filt %>% column_to_rownames(., "genome") %>% tss(), GIFT_db)

GIFTs_functions_community %>%
    as.data.frame() %>%
    rownames_to_column(var="sample") %>%
    pivot_longer(!sample,names_to="trait",values_to="gift") %>%
    left_join(sample_metadata, by = join_by(sample == EHI_number)) %>%
    ggplot(aes(x=trait,y=sample,fill=gift)) +
        geom_tile(colour="white", size=0.2)+
        scale_fill_gradientn(colours=rev(c("#d53e4f", "#f46d43", "#fdae61", "#fee08b", "#e6f598", "#abdda4", "#ddf1da")))+
        facet_grid(Elevation ~ ., scales="free",space="free")

9.4.3 Domain level

domains_table <- functions_table %>%
    to.domains(., GIFT_db) %>%
    as.data.frame()

community_domains <- pred_elevation %>%
  group_by(elevation, genome) %>%
  mutate(row_id = row_number()) %>%
  pivot_wider(names_from = genome, values_from = value) %>%
  ungroup() %>%
  group_split(row_id) %>%
  as.list() %>%
  lapply(., FUN = function(x){x %>%
    select(-row_id) %>%
    column_to_rownames(var = "elevation") %>%
    as.data.frame() %>%
    exp() %>%
    t() %>%
    tss() %>%
    to.community(domains_table,.,GIFT_db) %>%
    as.data.frame() %>%
    rownames_to_column(var="elevation")
   })
#max-min option
calculate_slope <- function(x) {
  lm_fit <- lm(unlist(x) ~ seq_along(unlist(x)))
  coef(lm_fit)[2]
}

domain_predictions <- map_dfc(community_domains, function(mat) {
      mat %>%
        column_to_rownames(var = "elevation") %>%
        t() %>%
        as.data.frame() %>%
        rowwise() %>%
        mutate(slope = calculate_slope(c_across(everything()))) %>%
        select(slope) }) %>%
      t() %>%
      as.data.frame() %>%
      set_names(colnames(community_domains[[1]])[-1]) %>%
      rownames_to_column(var="iteration") %>%
      pivot_longer(!iteration, names_to="trait",values_to="value") %>%
      group_by(trait) %>%
      summarise(mean=mean(value),
        p1 = quantile(value, probs = 0.1),
        p9 = quantile(value, probs = 0.9),
        positive_support = sum(value > 0)/1000,
        negative_support = sum(value < 0)/1000) %>%
      arrange(-positive_support)
# Positively associated
p3<-positive_filtered<-domain_predictions %>%
  filter(mean >0) %>%
  arrange(-positive_support) %>%
  filter(positive_support>=0.9) 

if (nrow(positive_filtered) > 0) {
  positive_filtered %>%
  tt() |>
      style_tt(
        i = which(domain_predictions$positive_support < 0.9 & domain_predictions$negative_support < 0.1),
        background = "#E5D5B1") |>
      style_tt(
        i = which(domain_predictions$negative_support > 0.9 & domain_predictions$positive_support < 0.1),
        background = "#B7BCCE")
}

# Negatively associated (there isn't)
#domain_predictions %>%
  #filter(mean <0) %>%
  #arrange(-negative_support) %>%
  #filter(negative_support>=0.9) %>%
  #tt() |>
      #style_tt(
        #i = which(domain_predictions$positive_support < 0.9 & domain_predictions$negative_support < 0.1),
        #background = "#E5D5B1") |>
      #style_tt(
        #i = which(domain_predictions$negative_support > 0.9 & domain_predictions$positive_support < 0.1),
        #background = "#B7BCCE")
# Positively associated
p3 %>%
  tt()
tinytable_uqext4shuflkmv3ybh2q
trait mean p1 p9 positive_support negative_support
Overall 0.006863701 0.0003939157 0.01509651 0.917 0.083
# Negatively associated (there isn't)
all_domains <- domain_predictions %>%
  left_join(GIFT_db,by=join_by(trait==Code_function)) %>%
  mutate(trait=factor(trait)) %>%
  mutate(function_legend=str_c(trait," - ",Function)) %>%
  select(trait,mean,p1,p9) %>% 
  unique()

all_domains %>%
  ggplot(aes(x=mean, y=fct_reorder(trait, mean), xmin=p1, xmax=p9, color=trait)) +
      geom_point() +
      geom_errorbar() +
      xlim(c(-0.02,0.03)) +
      geom_vline(xintercept=0) +
      theme_minimal() +
      labs(x="Regression coefficient",y="Domain level") +
      guides(col = guide_legend(ncol = 1))

community_domains %>%
    bind_rows() %>%
    pivot_longer(-elevation, names_to = "trait", values_to = "value") %>%
  filter(trait %in% domain_predictions$trait) %>%
  mutate(trait=factor(trait, levels=domain_predictions$trait)) %>%
    mutate(elevation=as.numeric(elevation)) %>%
    ggplot(aes(x=elevation, y=value)) +
          geom_smooth(method = lm, formula = y ~ x, se = TRUE) +
          #geom_smooth(method = lm, formula = y ~ splines::bs(x, 3), se = TRUE) +
          facet_wrap(~trait, ncol=5, scales="free") +
          theme_minimal() +
          labs(x="Elevation",y="Metabolic Capacity Index")

9.4.3.1 GIFT test visualization

# Aggregate function-level GIFTs into overall Biosynthesis, Degradation and Structural GIFTs
GIFTs_domains <- to.domains(GIFTs_functions, GIFT_db)
domains <- GIFTs_domains %>%
  as.data.frame()

# Get community-weighed average GIFTs per sample
GIFTs_domains_community <- to.community(GIFTs_domains, genome_counts_filt_filt %>% column_to_rownames(., "genome") %>% tss(), GIFT_db)

GIFTs_domains_community %>%
    as.data.frame() %>%
    rownames_to_column(var="sample") %>%
    pivot_longer(!sample,names_to="trait",values_to="gift") %>%
    left_join(sample_metadata, by = join_by(sample == EHI_number)) %>%
    ggplot(aes(x=trait,y=sample,fill=gift)) +
        geom_tile(colour="white", size=0.2)+
        scale_fill_gradientn(colours=rev(c("#d53e4f", "#f46d43", "#fdae61", "#fee08b", "#e6f598", "#abdda4", "#ddf1da")))+
        facet_grid(Elevation ~ ., scales="free",space="free")